16b840f9ad0ba7bd5408f0d96d154d3f8b9b05de,h2o-algos/src/main/java/hex/naivebayes/NaiveBayes.java,NaiveBayesDriver,computeStatsFillModel,#NaiveBayesModel#DataInfo#NBTask#,74
Before Change
new String[1][], new double[][] {apriori});
model._output._model_summary = createModelSummaryTable(model._output);
if(stop_requested()) return false;
_job.update(1, "Scoring and computing metrics on training data");
if (_parms._compute_metrics) {
model.score(_parms.train()).delete(); // This scores on the training data and appends a ModelMetrics
After Change
model._output._rescnt = tsk._rescnt;
model._output._ncats = dinfo._cats;
if(stop_requested() && !timeout()) return false;
_job.update(1, "Initializing arrays for model statistics");
// String[][] domains = dinfo._adaptedFrame.domains();
String[][] domains = model._output._domains;
double[] apriori = new double[tsk._nrescat];
double[][][] pcond = new double[tsk._npreds][][];
for(int i = 0; i < pcond.length; i++) {
int ncnt = domains[i] == null ? 2 : domains[i].length;
pcond[i] = new double[tsk._nrescat][ncnt];
}
if(stop_requested() && !timeout()) return false;
_job.update(1, "Computing probabilities for categorical cols");
// A-priori probability of response y
for(int i = 0; i < apriori.length; i++)
apriori[i] = ((double)tsk._rescnt[i] + _parms._laplace)/(tsk._nobs + tsk._nrescat * _parms._laplace);
// apriori[i] = tsk._rescnt[i]/tsk._nobs; // Note: R doesn't apply laplace smoothing to priors, even though this is textbook definition
// Probability of categorical predictor x_j conditional on response y
for(int col = 0; col < dinfo._cats; col++) {
assert pcond[col].length == tsk._nrescat;
for(int i = 0; i < pcond[col].length; i++) {
for(int j = 0; j < pcond[col][i].length; j++)
pcond[col][i][j] = ((double)tsk._jntcnt[col][i][j] + _parms._laplace)/((double)tsk._rescnt[i] + domains[col].length * _parms._laplace);
}
}
if(stop_requested() && !timeout()) return false;
_job.update(1, "Computing mean and standard deviation for numeric cols");
// Mean and standard deviation of numeric predictor x_j for every level of response y
for(int col = 0; col < dinfo._nums; col++) {
for(int i = 0; i < pcond[0].length; i++) {
int cidx = dinfo._cats + col;
double num = tsk._rescnt[i];
double pmean = tsk._jntsum[col][i][0]/num;
pcond[cidx][i][0] = pmean;
// double pvar = tsk._jntsum[col][i][1]/num - pmean * pmean;
double pvar = tsk._jntsum[col][i][1]/(num - 1) - pmean * pmean * num/(num - 1);
pcond[cidx][i][1] = Math.sqrt(pvar);
}
}
model._output._apriori_raw = apriori;
model._output._pcond_raw = pcond;
// Create table of conditional probabilities for every predictor
model._output._pcond = new TwoDimTable[pcond.length];
String[] rowNames = _response.domain();
for(int col = 0; col < dinfo._cats; col++) {
String[] colNames = _train.vec(col).domain();
String[] colTypes = new String[colNames.length];
String[] colFormats = new String[colNames.length];
Arrays.fill(colTypes, "double");
Arrays.fill(colFormats, "%5f");
model._output._pcond[col] = new TwoDimTable(_train.name(col), null, rowNames, colNames, colTypes, colFormats,
"Y_by_" + _train.name(col), new String[rowNames.length][], pcond[col]);
}
for(int col = 0; col < dinfo._nums; col++) {
int cidx = dinfo._cats + col;
model._output._pcond[cidx] = new TwoDimTable(_train.name(cidx), null, rowNames, new String[] {"Mean", "Std_Dev"},
new String[] {"double", "double"}, new String[] {"%5f", "%5f"}, "Y_by_" + _train.name(cidx),
new String[rowNames.length][], pcond[cidx]);
}
// Create table of a-priori probabilities for the response
String[] colTypes = new String[_response.cardinality()];
String[] colFormats = new String[_response.cardinality()];
Arrays.fill(colTypes, "double");
Arrays.fill(colFormats, "%5f");
model._output._apriori = new TwoDimTable("A Priori Response Probabilities", null, new String[1], _response.domain(), colTypes, colFormats, "",
new String[1][], new double[][] {apriori});
model._output._model_summary = createModelSummaryTable(model._output);
if(stop_requested() && !timeout()) return false;
_job.update(1, "Scoring and computing metrics on training data");
if (_parms._compute_metrics) {
model.score(_parms.train()).delete(); // This scores on the training data and appends a ModelMetrics